#-------------------------------------------------------#
#   Quality Assessment of the Academic Freedom Index    #
#-------------------------------------------------------#

# Title: "Quality Assessment of the Academic Freedom Index: Strengths, Weaknesses, and How Best to Use It" 
# Authors: "Lott, Lars", "Spannagel, Janika"
# date: 2024-09-19
# journal: Perspectives on Politics
# DOI: TBA
# written under "R and RStudio (4.4.1)"


#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(lubridate)
library(dotwhisker)
library(estimatr)
library(texreg)
library(readxl)
library(haven)
library(rstan)
library(truncnorm)
library(abind)
library(matrixStats)
library(ggeffects)
library(viridis)

set.seed(1234)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#load coder-level data 
coder_level_ds <- read.csv(file.path("data/vdem_v13",  "vdem_13_coder_level", "coder_level_ds_v13.csv"))
gc()

#load coder characteristics
coder_characteristics <- readRDS(file.path("data\vdem_v13", "coder_characteristics/academzz_all.rds"))

# load V-Dem dataset 
vdem_full_v13 <- readRDS(file.path("data/vdem_v13", "vdem_13_cy", "V-Dem-CY-Full+Others-v13.rds"))


#---------------------------------------------------#
#-------------Predicting Respondent perceptions ----#
#---------------------------------------------------#

### Load posteriors

v2cafexch <- readRDS("data/posteriors/v13/v2cafexch_posterior.rds") # freedom of academic exchange and dissemination
v2cafres <- readRDS("data/posteriors/v13/v2cafres_posterior.rds") # freedom to research and teach
v2cainsaut <- readRDS("data/posteriors/v13/v2cainsaut_posterior.rds") # institutional autonomy
v2casurv <- readRDS("data/posteriors/v13/v2casurv_posterior.rds") #campus integrity
v2clacfree <- readRDS("data/posteriors/v13/v2clacfree_posterior.rds") # freedom of academic and cultural expression



#### First step: Calculate perception estimates for raters ####

vnames <- c("v2cafexch", "v2cafres", "v2cainsaut",
            "v2casurv", "v2clacfree")

v2cafexch$posterior

#### Create a function to create rater perceptions for each variable ####

## Perceptions have the conditional distribution N(Z_i, \sigma_r^2), truncated to
## \gamma_{j,{y_{ir}-1, y_{ir}}}.
## \sigma_r^2 = (1/beta_r)^2 
## \gamma_{r,k} = \tau_{r,k} / \sigma_r
## i is the country-year, r is the rater
## truncates min(beta) at 0.07 to get rid of really annoying edge cases

gen.perceptions <- function (name, thin=4, minbeta=0.07) {
  post <- rstan::extract(get(name)$posterior)
  
  coder.table <- get(name)$post.sample$gamma %>%
    rownames %>% 
    grep("1[.]", ., value = TRUE) %>%
    gsub("1[.]", "", .) %>%
    data.frame(
      coder_id = as.numeric(.), 
      rater_id = seq_along(.), 
      row.names = NULL,
      stringsAsFactors = FALSE)
  
  y <- get(name)$model_input$y
  r <- get(name)$model_input$j_id
  i <- get(name)$model_input$sy_id
  post$beta[post$beta < minbeta] <- minbeta
  sigma <- 1/post$beta
  tau <- array(apply(post$gamma, 3, function (x) x * sigma), dim=dim(post$gamma))
  tau <- abind(matrix(-Inf, nrow=nrow(sigma), ncol=ncol(sigma)),
               tau,
               matrix(Inf, nrow=nrow(sigma), ncol=ncol(sigma)), along=3)
  cid <-  sapply(r, function (x) coder.table$coder_id[coder.table$rater_id == x])
  pcept <- sapply(seq(1, nrow(post$Z), thin), function (sim) {
    tau.obs <- sapply(1:length(y), function (obs) tau[sim, r[obs], y[obs]])
    taup1.obs <- sapply(1:length(y), function(obs) tau[sim, r[obs], y[obs]+1])
    rtruncnorm(length(y), tau.obs, taup1.obs,
               post$Z[sim,i], (sigma[sim, r])^2)
  })
  rownames(pcept) <- paste(cid, i, sep = "_")
  pcept
}

################################################################################
#### Calculate Coder perceptions of country_dates and extract coder Perceptions per variable ####
################################################################################


#### v2cafexch ####

v2cafexch.perceptions <- gen.perceptions("v2cafexch")

v2cafexch.perceptionsmean <- sapply(paste("v2cafexch", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cafexch.perceptionssd <- sapply(paste("v2cafexch", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cafexch.perceptionsmean <- as.data.frame(v2cafexch.perceptionsmean)
v2cafexch.perceptionsmean <- tibble::rownames_to_column(v2cafexch.perceptionsmean, "id_var")

v2cafexch.perceptionssd <- as.data.frame(v2cafexch.perceptionssd)
v2cafexch.perceptionssd <- tibble::rownames_to_column(v2cafexch.perceptionssd, "id_var")

v2cafexch.perceptionsmean %<>% rename(v2cafexch_perceptmean = v2cafexch.perceptions)
v2cafexch.perceptionssd %<>% rename(v2cafexch_perceptsd = v2cafexch.perceptions)

v2cafexch.perceptions <- v2cafexch.perceptionsmean %>%
  left_join(v2cafexch.perceptionssd, by = "id_var")

v2cafexch.perceptions$country_date_id <- as.numeric(map(str_split(v2cafexch.perceptions$id_var, "_"), 2))
v2cafexch.perceptions$coder_id <- as.numeric(map(str_split(v2cafexch.perceptions$id_var, "_"), 1))
v2cafexch.perceptions <- v2cafexch.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2cafres #####

v2cafres.perceptions <- gen.perceptions("v2cafres")

v2cafres.perceptionsmean <- sapply(paste("v2cafres", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cafres.perceptionssd <- sapply(paste("v2cafres", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cafres.perceptionsmean <- as.data.frame(v2cafres.perceptionsmean)
v2cafres.perceptionsmean <- tibble::rownames_to_column(v2cafres.perceptionsmean, "id_var")

v2cafres.perceptionssd <- as.data.frame(v2cafres.perceptionssd)
v2cafres.perceptionssd <- tibble::rownames_to_column(v2cafres.perceptionssd, "id_var")

v2cafres.perceptionsmean %<>% rename(v2cafres_perceptmean = v2cafres.perceptions)
v2cafres.perceptionssd %<>% rename(v2cafres_perceptsd = v2cafres.perceptions)

v2cafres.perceptions <- v2cafres.perceptionsmean %>%
  left_join(v2cafres.perceptionssd, by = "id_var")

v2cafres.perceptions$country_date_id <- as.numeric(map(str_split(v2cafres.perceptions$id_var, "_"), 2))
v2cafres.perceptions$coder_id <- as.numeric(map(str_split(v2cafres.perceptions$id_var, "_"), 1))
v2cafres.perceptions <- v2cafres.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2cainsaut#####

v2cainsaut.perceptions <- gen.perceptions("v2cainsaut")

v2cainsaut.perceptionsmean <- sapply(paste("v2cainsaut", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cainsaut.perceptionssd <- sapply(paste("v2cainsaut", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cainsaut.perceptionsmean <- as.data.frame(v2cainsaut.perceptionsmean)
v2cainsaut.perceptionsmean <- tibble::rownames_to_column(v2cainsaut.perceptionsmean, "id_var")

v2cainsaut.perceptionssd <- as.data.frame(v2cainsaut.perceptionssd)
v2cainsaut.perceptionssd <- tibble::rownames_to_column(v2cainsaut.perceptionssd, "id_var")

v2cainsaut.perceptionsmean %<>% rename(v2cainsaut_perceptmean = v2cainsaut.perceptions)
v2cainsaut.perceptionssd %<>% rename(v2cainsaut_perceptsd = v2cainsaut.perceptions)

v2cainsaut.perceptions <- v2cainsaut.perceptionsmean %>%
  left_join(v2cainsaut.perceptionssd, by = "id_var")

v2cainsaut.perceptions$country_date_id <- as.numeric(map(str_split(v2cainsaut.perceptions$id_var, "_"), 2))
v2cainsaut.perceptions$coder_id <- as.numeric(map(str_split(v2cainsaut.perceptions$id_var, "_"), 1))
v2cainsaut.perceptions <- v2cainsaut.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2casurv #####

v2casurv.perceptions <- gen.perceptions("v2casurv")

v2casurv.perceptionsmean <- sapply(paste("v2casurv", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2casurv.perceptionssd <- sapply(paste("v2casurv", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2casurv.perceptionsmean <- as.data.frame(v2casurv.perceptionsmean)
v2casurv.perceptionsmean <- tibble::rownames_to_column(v2casurv.perceptionsmean, "id_var")

v2casurv.perceptionssd <- as.data.frame(v2casurv.perceptionssd)
v2casurv.perceptionssd <- tibble::rownames_to_column(v2casurv.perceptionssd, "id_var")

v2casurv.perceptionsmean %<>% rename(v2casurv_perceptmean = v2casurv.perceptions)
v2casurv.perceptionssd %<>% rename(v2casurv_perceptsd = v2casurv.perceptions)

v2casurv.perceptions <- v2casurv.perceptionsmean %>%
  left_join(v2casurv.perceptionssd, by = "id_var")

v2casurv.perceptions$country_date_id <- as.numeric(map(str_split(v2casurv.perceptions$id_var, "_"), 2))
v2casurv.perceptions$coder_id <- as.numeric(map(str_split(v2casurv.perceptions$id_var, "_"), 1))
v2casurv.perceptions <- v2casurv.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())


#### v2clacfree #####

v2clacfree.perceptions <- gen.perceptions("v2clacfree")

v2clacfree.perceptionsmean <- sapply(paste("v2clacfree", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2clacfree.perceptionssd <- sapply(paste("v2clacfree", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2clacfree.perceptionsmean <- as.data.frame(v2clacfree.perceptionsmean)
v2clacfree.perceptionsmean <- tibble::rownames_to_column(v2clacfree.perceptionsmean, "id_var")

v2clacfree.perceptionssd <- as.data.frame(v2clacfree.perceptionssd)
v2clacfree.perceptionssd <- tibble::rownames_to_column(v2clacfree.perceptionssd, "id_var")

v2clacfree.perceptionsmean %<>% rename(v2clacfree_perceptmean = v2clacfree.perceptions)
v2clacfree.perceptionssd %<>% rename(v2clacfree_perceptsd = v2clacfree.perceptions)

v2clacfree.perceptions <- v2clacfree.perceptionsmean %>%
  left_join(v2clacfree.perceptionssd, by = "id_var")

v2clacfree.perceptions$country_date_id <- as.numeric(map(str_split(v2clacfree.perceptions$id_var, "_"), 2))
v2clacfree.perceptions$coder_id <- as.numeric(map(str_split(v2clacfree.perceptions$id_var, "_"), 1))
v2clacfree.perceptions <- v2clacfree.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())


################################################################################
#### Merge var_perceptions data with coder_level_ds                         ####
################################################################################

v2cafexch.country_date_translation <- v2cafexch$country_date_translation
v2cafres.country_date_translation <- v2cafres$country_date_translation
v2cainsaut.country_date_translation <- v2cainsaut$country_date_translation
v2casurv.country_date_translation <- v2casurv$country_date_translation
v2clacfree.country_date_translation <- v2clacfree$country_date_translation

rm(list = ls()[grep("perceptionsmean", ls())])
rm(list = ls()[grep("perceptionssd", ls())])

v2cafexch.perceptions %<>%
  left_join(v2cafexch.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)

v2cafres.perceptions %<>%
  left_join(v2cafres.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2cainsaut.perceptions %<>%
  left_join(v2cainsaut.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2casurv.perceptions %<>%
  left_join(v2casurv.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2clacfree.perceptions %<>%
  left_join(v2clacfree.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2cl")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)

v2cafexch.perceptions_vignettes <- v2cafexch.perceptions %>%
 filter(historical_date == "NULL") %>%
 mutate(VAR = "v2cafexch") %>%
 rename(perceptmean = v2cafexch_perceptmean, 
                perceptsd = v2cafexch_perceptsd)

v2cafres.perceptions_vignettes <- v2cafres.perceptions %>%
  filter(historical_date == "NULL") %>%
  mutate(VAR = "v2cafres") %>%
  rename(perceptmean = v2cafres_perceptmean, 
         perceptsd = v2cafres_perceptsd)

v2cainsaut.perceptions_vignettes <- v2cainsaut.perceptions %>%
  filter(historical_date == "NULL") %>%
  mutate(VAR = "v2cainsaut") %>%
  rename(perceptmean = v2cainsaut_perceptmean, 
         perceptsd = v2cainsaut_perceptsd)

v2casurv.perceptions_vignettes <- v2casurv.perceptions %>%
  filter(historical_date == "NULL") %>%
  mutate(VAR = "v2casurv") %>%
  rename(perceptmean = v2casurv_perceptmean, 
         perceptsd = v2casurv_perceptsd)

v2clacfree.perceptions_vignettes <- v2clacfree.perceptions %>%
  filter(historical_date == "NULL") %>%
  mutate(VAR = "v2clacfree") %>%
  rename(perceptmean = v2clacfree_perceptmean, 
         perceptsd = v2clacfree_perceptsd)

data_indicators <- rbind(v2cafexch.perceptions_vignettes, v2cafres.perceptions_vignettes, 
                         v2cainsaut.perceptions_vignettes,v2casurv.perceptions_vignettes, 
                         v2clacfree.perceptions_vignettes)

data_indicators <- data_indicators %>%
  group_by(country_text_id) %>%
  mutate(mean_Perception = mean(perceptmean))

#### merge coder characteristics ####

coder_characteristics_wide <- coder_characteristics %>%
  pivot_wider(id_cols = coder_id, names_from = question, values_from = code)  %>%
  mutate(reside_in_country = ifelse(reside == main_country, 1, 0)) # build reside_in_country variable 

summary(coder_characteristics_wide)

coder_characteristics_wide$gender  <- as.factor(coder_characteristics_wide$gender)
coder_characteristics_wide$government_employment  <- as.factor(coder_characteristics_wide$government_employment)
coder_characteristics_wide$age_block  <- as.factor(coder_characteristics_wide$age_block)
coder_characteristics_wide$phd  <- as.factor(coder_characteristics_wide$phd)
coder_characteristics_wide$reside_in_country  <- as.factor(coder_characteristics_wide$reside_in_country)

coder_level_ds_subset_coder_id <- coder_level_ds %>%
  group_by(coder_id) %>%
  dplyr::summarize(v2zztimespent = mean(v2zztimespent), 
            v2zzsatisf = mean(v2zzsatisf))


data_indicators <- data_indicators %>%
  left_join(coder_characteristics_wide, by = c("coder_id")) %>%
  mutate(respondent_deviation = mean_Perception - perceptmean) %>%
  left_join(coder_level_ds_subset_coder_id,  by = c("coder_id"))

summary(data_indicators)


## How many coders coded Vignettes ##

num_of_coders_vig <- data_indicators %>%
  distinct(coder_id) %>%
  ungroup %>%
  dplyr::select(coder_id)

diff_coders <- setdiff(coder_characteristics_wide$coder_id, num_of_coders_vig$coder_id) 

diff_coders2 <- setdiff(num_of_coders_vig$coder_id, coder_characteristics_wide$coder_id) 



## pooled regression analysis ##
m1 <-  lm_robust(respondent_deviation ~ gender + age_block + phd + government_employment + 
                 VAR,
                 clusters = country_text_id, 
                 data = data_indicators)
summary(m1)

m2 <-  lm_robust(respondent_deviation ~ gender + age_block + phd + government_employment + 
                   v2zztimespent + v2zzsatisf + VAR,
                 clusters = country_text_id, 
                 data = data_indicators)
summary(m2)



## Table H1 Supplementary Appendix ##
texreg(list(m1, m2), 
       head.tag = TRUE, body.tag = TRUE,
       file = "outputs_original_data/Table_H1.tex",
       digits = 3,
       custom.coef.names = c("Intercept",
                             "Women", 
                             "Age Block 2", 
                             "Age Block 3", 
                             "PhD degree", 
                             "Goverment employee", 
                             "v2cafres", 
                             "v2cainsaut", 
                             "v2casurv", 
                             "v2clacfree", 
                             "Time spent for coding", 
                             "Satisfaction with coding experience"),
       booktabs = TRUE,
       use.packages = FALSE,
       caption = "Linear Models predicting respondents deviations from mean",
       fontsize = "scriptsize",
       stars = c(0.001, 0.01, 0.05), 
       label = "Table:H1", 
       longtable = TRUE)
modelsummary::modelsummary(list(m1, m2))


##Margins relationship between income and inequality
m1_me <- ggpredict(m1, terms = c("gender[0,1]"))

levels(m1_me$x) <- c("Male", "Female")

f1a <- ggplot(m1_me, aes(x = x, y = predicted)) +
  geom_point() +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high)) +
  theme_bw() +
  ggtitle("") +
  labs(color= "", fill = "",
       y= "Predicted Respondent Deviation",
       x = "Gender of Respondent") + 
  theme(legend.position="bottom") +
  scale_color_grey() +
  scale_fill_grey()

f1a

ggsave("outputs_original_data/Figure_H1.pdf", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)
ggsave("outputs_original_data/Figure_H1.png", width = 20,
       height = 12,
       units = c("cm"), dpi = 1000)




